\relax 
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
\global\let\oldcontentsline\contentsline
\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
\global\let\oldnewlabel\newlabel
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\ifx\hyper@anchor\@undefined
\let\contentsline\oldcontentsline
\let\newlabel\oldnewlabel
\fi}
\fi}
\global\let\hyper@last\relax 
\gdef\HyperFirstAtBeginDocument#1{#1}
\providecommand\HyField@AuxAddToFields[1]{}
\providecommand\HyField@AuxAddToCoFields[2]{}
\citation{goodfellow2016deep}
\citation{power2022grokking}
\citation{vaswani2017attention}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{section.1}\protected@file@percent }
\newlabel{sec:intro}{{1}{1}{Introduction}{section.1}{}}
\newlabel{sec:intro@cref}{{[section][1][]1}{[1][1][]1}}
\citation{goodfellow2016deep}
\citation{power2022grokking}
\citation{vaswani2017attention}
\@writefile{toc}{\contentsline {section}{\numberline {2}Background}{2}{section.2}\protected@file@percent }
\newlabel{sec:background}{{2}{2}{Background}{section.2}{}}
\newlabel{sec:background@cref}{{[section][2][]2}{[1][2][]2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Problem Setting}{2}{subsection.2.1}\protected@file@percent }
\citation{vaswani2017attention}
\citation{ba2016layer}
\@writefile{toc}{\contentsline {section}{\numberline {3}Method}{3}{section.3}\protected@file@percent }
\newlabel{sec:method}{{3}{3}{Method}{section.3}{}}
\newlabel{sec:method@cref}{{[section][3][]3}{[1][3][]3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Model Architecture}{3}{subsection.3.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Input Representation}{3}{subsection.3.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Data Augmentation Techniques}{3}{subsection.3.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.1}Operand Reversal}{3}{subsubsection.3.3.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.2}Operand Negation}{3}{subsubsection.3.3.2}\protected@file@percent }
\citation{loshchilov2017adamw}
\citation{paszke2019pytorch}
\citation{ba2016layer}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Augmentation Strategy}{4}{subsection.3.4}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.5}Training Procedure}{4}{subsection.3.5}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.6}Evaluation Metrics}{4}{subsection.3.6}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {4}Experimental Setup}{4}{section.4}\protected@file@percent }
\newlabel{sec:experimental}{{4}{4}{Experimental Setup}{section.4}{}}
\newlabel{sec:experimental@cref}{{[section][4][]4}{[1][4][]4}}
\citation{loshchilov2017adamw}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Validation accuracy over training steps for division operation under different augmentation strategies.\relax }}{5}{figure.caption.1}\protected@file@percent }
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
\newlabel{fig:val_acc_div}{{1}{5}{Validation accuracy over training steps for division operation under different augmentation strategies.\relax }{figure.caption.1}{}}
\newlabel{fig:val_acc_div@cref}{{[figure][1][]1}{[1][5][]5}}
\@writefile{toc}{\contentsline {section}{\numberline {5}Results}{5}{section.5}\protected@file@percent }
\newlabel{sec:results}{{5}{5}{Results}{section.5}{}}
\newlabel{sec:results@cref}{{[section][5][]5}{[1][5][]5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Addition in Modular Arithmetic}{6}{subsection.5.1}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Validation accuracy over training steps for addition operation under different augmentation strategies.\relax }}{6}{figure.caption.2}\protected@file@percent }
\newlabel{fig:val_acc_add}{{2}{6}{Validation accuracy over training steps for addition operation under different augmentation strategies.\relax }{figure.caption.2}{}}
\newlabel{fig:val_acc_add@cref}{{[figure][2][]2}{[1][6][]6}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Subtraction in Modular Arithmetic}{6}{subsection.5.2}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Validation accuracy over training steps for subtraction operation under different augmentation strategies.\relax }}{6}{figure.caption.3}\protected@file@percent }
\newlabel{fig:val_acc_sub}{{3}{6}{Validation accuracy over training steps for subtraction operation under different augmentation strategies.\relax }{figure.caption.3}{}}
\newlabel{fig:val_acc_sub@cref}{{[figure][3][]3}{[1][6][]6}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.3}Division in Modular Arithmetic}{7}{subsection.5.3}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Validation accuracy over training steps for division operation under different augmentation strategies.\relax }}{7}{figure.caption.4}\protected@file@percent }
\newlabel{fig:val_acc_div}{{4}{7}{Validation accuracy over training steps for division operation under different augmentation strategies.\relax }{figure.caption.4}{}}
\newlabel{fig:val_acc_div@cref}{{[figure][4][]4}{[1][7][]7}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.4}Comparative Analysis of Augmentation Strategies}{7}{subsection.5.4}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Steps to 99\% validation accuracy for different operations and augmentation strategies.\relax }}{7}{table.caption.5}\protected@file@percent }
\newlabel{tab:steps_to_99}{{1}{7}{Steps to 99\% validation accuracy for different operations and augmentation strategies.\relax }{table.caption.5}{}}
\newlabel{tab:steps_to_99@cref}{{[table][1][]1}{[1][7][]7}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.5}Grokking Dynamics Analysis}{7}{subsection.5.5}\protected@file@percent }
\citation{power2022grokking}
\citation{vaswani2017attention}
\citation{power2022grokking}
\newlabel{fig:train_acc_div}{{5a}{8}{Training accuracy for division\relax }{figure.caption.6}{}}
\newlabel{fig:train_acc_div@cref}{{[subfigure][1][5]5a}{[1][7][]8}}
\newlabel{sub@fig:train_acc_div}{{a}{8}{Training accuracy for division\relax }{figure.caption.6}{}}
\newlabel{sub@fig:train_acc_div@cref}{{[subfigure][1][5]5a}{[1][7][]8}}
\newlabel{fig:train_loss_div}{{5b}{8}{Training loss for division\relax }{figure.caption.6}{}}
\newlabel{fig:train_loss_div@cref}{{[subfigure][2][5]5b}{[1][7][]8}}
\newlabel{sub@fig:train_loss_div}{{b}{8}{Training loss for division\relax }{figure.caption.6}{}}
\newlabel{sub@fig:train_loss_div@cref}{{[subfigure][2][5]5b}{[1][7][]8}}
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Training dynamics for division operation under different augmentation strategies.\relax }}{8}{figure.caption.6}\protected@file@percent }
\newlabel{fig:train_dynamics_div}{{5}{8}{Training dynamics for division operation under different augmentation strategies.\relax }{figure.caption.6}{}}
\newlabel{fig:train_dynamics_div@cref}{{[figure][5][]5}{[1][7][]8}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.6}Limitations and Considerations}{8}{subsection.5.6}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {6}Conclusions and Future Work}{8}{section.6}\protected@file@percent }
\newlabel{sec:conclusion}{{6}{8}{Conclusions and Future Work}{section.6}{}}
\newlabel{sec:conclusion@cref}{{[section][6][]6}{[1][8][]8}}
\bibstyle{iclr2024_conference}
\bibdata{references}
\bibcite{ba2016layer}{{1}{2016}{{Ba et~al.}}{{Ba, Kiros, and Hinton}}}
\bibcite{goodfellow2016deep}{{2}{2016}{{Goodfellow et~al.}}{{Goodfellow, Bengio, Courville, and Bengio}}}
\bibcite{loshchilov2017adamw}{{3}{2017}{{Loshchilov \& Hutter}}{{Loshchilov and Hutter}}}
\bibcite{paszke2019pytorch}{{4}{2019}{{Paszke et~al.}}{{Paszke, Gross, Massa, Lerer, Bradbury, Chanan, Killeen, Lin, Gimelshein, Antiga, et~al.}}}
\bibcite{power2022grokking}{{5}{2022}{{Power et~al.}}{{Power, Burda, Edwards, Babuschkin, and Misra}}}
\bibcite{vaswani2017attention}{{6}{2017}{{Vaswani et~al.}}{{Vaswani, Shazeer, Parmar, Uszkoreit, Jones, Gomez, Kaiser, and Polosukhin}}}
\ttl@finishall
\gdef \@abspage@last{9}
